/*******************************************************************************
* Long-term effects of weather-induced migration on urban labor and housing 
  markets
* Busso & Chauvin

* Purpose: Calculate the characteristics of internal migrants who arrived in 
urban areas in the 2000–2010 period.

*******************************************************************************/

/*******************************************************************************
	ENVIRONMENT
*******************************************************************************/
global geo mcarp8010
global geocodefile "${db_inp}/geo_codes_sat_cities.dta"

/*******************************************************************************
	DROUGHT AND TYPO OF SHOCK VARIABLES
*******************************************************************************/
/* Data */
* Load SPEI-12 data
use "$db_inp/spei12_yearly_mca8010", replace

* Keep only years 1998-2009
keep if year >= 1998 & year <= 2010

* Drought threshold
local dr 1

/* Generate the drought shock */
* Sort by location, year to identify droughts in a 3-year span
sort mca8010 year

* Reverse values in original SPEI-12 data and replace any "wet" month as zero
forvalues i = 1/12 {
	replace month`i' = -month`i'
}

* Yearly sum
egen yearly_sum = rowtotal(month*)

* Sum three previous years of SPEI data of each MCA
by mca8010: gen shock3yrs = yearly_sum[_n-1] + yearly_sum[_n-2] + yearly_sum[_n-3]

* Compute the average of 3-year drought
by mca8010: gen shock = shock3yrs/36

/* Shock measures */
* Drought Shock 1: 36-month average values over 1 being drought shocks
gen shock1 = 0 if  shock >= -`dr' & shock <= `dr'
replace shock1 = 1 if shock < -`dr' | shock > `dr'

* Rename year variable to merge with year of migration of the Census dataset
rename year yearmig

keep mca8010 yearmig shock1

save "$da_tmp/drought", replace

/*******************************************************************************
	LABOR CHARACTERISTICS OF INTERNAL MIGRANTS
*******************************************************************************/
* Load 2010 individual housing Census data
use mca8010_o mcarp8010 wtper pid workage sex schcomp emp ///
rate_inform l_wage renter mgyrsmuni migrant_urb_urb migrant_rur_urb migrant ///
age using "${db_tmp}/${geo}_pcity_ind_10", replace 

* Make variables used in the table
gen age_mig = age - mgyrsmuni
gen workage_mig = age_mig >= 15 & age_mig <= 64 if !missing(mgyrsmuni)
gen agemig30 = (age_mig >= 15 & age_mig <= 30) if age_mig != . & workage_mig
gen agemig31 = (age_mig > 30) if age_mig != . & workage_mig

gen noprim = (schcomp == 1) if schcomp != .
gen nohs2 = (schcomp == 2 | schcomp == 3 ) if schcomp != .
gen hs = (schcomp == 4 | schcomp == 5) if schcomp != .
gen female_mig = (sex == 2) if sex != . & workage_mig
gen noprim_mig = noprim if workage_mig
gen nohs2_mig = nohs2 if workage_mig
gen hs_mig = hs if workage_mig
drop noprim nohs2 hs

gen year = 2010

/* Merge with drought data and generate by type of migration variables */
* Merge with drought data
gen yearmig = (year - mgyrsmuni) if mgyrsmuni != .
rename mca8010_o mca8010
merge m:1 mca8010 yearmig using "$da_tmp/drought", keepusing(shock1) nogen

tempfile labor_ch
save `labor_ch', replace

* Generate subcategories based on type of migrantion
global vars workage_mig female_mig agemig30 agemig31 noprim_mig nohs2_mig hs_mig ///
emp emp rate_inform rate_inform l_wage l_wage renter renter

foreach if in "if migrant_urb_urb == 1" "if migrant_rur_urb == 1 & shock1 == 0" "if migrant_rur_urb == 1 & shock1 == 1" "if mcarp8010 != 1000000" "if migrant == 0" { 

    * Assign condition labels
    if "`if'" == "if migrant_urb_urb == 1" local cond = "u"
    if "`if'" == "if migrant_rur_urb == 1 & shock1 == 0" local cond = "n"
    if "`if'" == "if migrant_rur_urb == 1 & shock1 == 1" local cond = "d"
    if "`if'" == "if mcarp8010 != 1000000" local cond = "urb"
    if "`if'" == "if migrant == 0" local cond = "res"

    local contador = 0
    foreach x in $vars {        
        local contador = `contador' + 1
        use `labor_ch', clear    

        * Check if the variable is `workage_mig` to calculate the sum
        if "`x'" == "workage_mig" {
            collapse (sum) mean_`cond' = `x' `if' [pw = wtper]
        }
        else {
            collapse (mean) mean_`cond' = `x' `if' [pw = wtper]
        }

        gen variable = "`x'"
		
		gen order = `contador'

        * Save results to a temporary file
        if `contador' == 1 {
            tempfile descriptive_stats_`cond'
            save `descriptive_stats_`cond'', replace
        }    
        else {
            append using `descriptive_stats_`cond''
            save `descriptive_stats_`cond'', replace    
        }    
    }
}

* Combine results from all conditions
use `descriptive_stats_u', replace
merge 1:1 variable order using `descriptive_stats_n', nogen
merge 1:1 variable order using `descriptive_stats_d', nogen
merge 1:1 variable order using `descriptive_stats_urb', nogen
merge 1:1 variable order using `descriptive_stats_res', nogen

sort order
order variable mean_u mean_n mean_d mean_urb mean_res order

replace mean_u = exp(mean_u) if variable == "l_wage"
replace mean_n = exp(mean_n) if variable == "l_wage"
replace mean_d = exp(mean_d) if variable == "l_wage"
replace mean_urb = exp(mean_urb) if variable == "l_wage"
replace mean_res = exp(mean_res) if variable == "l_wage"

tempfile labor_ch_2
save `labor_ch_2', replace

/*******************************************************************************
	HOUSING CHARACTERISTICS OF INTERNAL MIGRANTS
*******************************************************************************/
* Load 2010 individual housing Census data
use mca8010 mcarp8010 wtper pid mgyrsmuni l_rent migrant_urb_urb migrant_rur_urb ///
migrant using "${db_tmp}/${geo}_phous_ind_10", replace

gen year = 2010

/* Merge with drought data & produce "by type of migrant" variables */
* Merge with drought data
gen yearmig = (year - mgyrsmuni) if mgyrsmuni != .
merge m:1 mca8010 yearmig using "$da_tmp/drought", keepusing(shock1) nogen

tempfile housing_ch
save `housing_ch', replace

global vars "l_rent l_rent l_rent"

* Generate subcategories based on type of migrant
foreach if in "if migrant_urb_urb == 1" "if migrant_rur_urb == 1 & shock1 == 0" "if migrant_rur_urb == 1 & shock1 == 1" "if mcarp8010 != 1000000" "if migrant == 0" { 

    * Assign condition labels
    if "`if'" == "if migrant_urb_urb == 1" local cond = "u"
    if "`if'" == "if migrant_rur_urb == 1 & shock1 == 0" local cond = "n"
    if "`if'" == "if migrant_rur_urb == 1 & shock1 == 1" local cond = "d"
    if "`if'" == "if mcarp8010 != 1000000" local cond = "urb"
    if "`if'" == "if migrant == 0" local cond = "res"

    local contador = 0
    foreach x in $vars {        
        local contador = `contador' + 1
        use `housing_ch', clear    

        * Check if the variable is `workage_mig` to calculate the sum

        collapse (mean) mean_`cond' = `x' `if' [pw = wtper]
      
        gen variable = "`x'"
		
		gen order = 15 + `contador'

        * Save results to a temporary file
        if `contador' == 1 {
            tempfile descriptive_stats_`cond'
            save `descriptive_stats_`cond'', replace
        }    
        else {
            append using `descriptive_stats_`cond''
            save `descriptive_stats_`cond'', replace    
        }    
    }
}

* Combine results from all conditions
use `descriptive_stats_u', replace
merge 1:1 variable order using `descriptive_stats_n', nogen
merge 1:1 variable order using `descriptive_stats_d', nogen
merge 1:1 variable order using `descriptive_stats_urb', nogen
merge 1:1 variable order using `descriptive_stats_res', nogen

replace mean_u = exp(mean_u)
replace mean_n = exp(mean_n) 
replace mean_d = exp(mean_d)
replace mean_urb = exp(mean_urb)
replace mean_res = exp(mean_res)

append using `labor_ch_2'

tempfile characteristics
save `characteristics', replace

/*******************************************************************************
	RURAL AVERAGE RENT
*******************************************************************************/
cd "${census_harm}"
unzipfile Census_10

/* Include the average rural rent and residualized rent */
use pid wtper muni mgyrsmuni mgprevmuni rent using "${census_harm}/Census_10", replace

merge m:1 muni using "${geocodefile}", nogen keepusing(${geo}) keep(match master)

* Keep only rural observations
keep if mcarp8010 == 1000000

* Keep one rent per family
replace rent = . if pid != 1 | rent >= 990000
	
* Log rent
gen l_rent = ln(rent)
replace l_rent = . if l_rent == 0

collapse (mean) l_rent [pw = wtper]
gen rent = exp(l_rent)

sum rent
local mean_rent = r(mean)

/*******************************************************************************
	FORMAT
*******************************************************************************/
use `characteristics', replace

sort order
order variable mean_u mean_n mean_d mean_urb mean_res order 

ds mean*
foreach var in `r(varlist)' {
    replace `var' = `var' / 1000 if variable == "workage_mig"
}

ds mean*
foreach var in `r(varlist)' {
    replace `var' = `var'*100 if variable != "workage_mig" & variable != "l_wage" & variable != "l_rent"
}

ds mean*
foreach var in `r(varlist)' {
    replace `var' = `var' - mean_urb if order == 9 | order == 11 | order == 15
}

ds mean*
foreach var in `r(varlist)' {
    replace `var' = 100*(`var'/`mean_rent') if order == 18
}

ds mean*
foreach var in `r(varlist)' {
    replace `var' = 100*(`var'/mean_res) if order == 13 | order == 17
}

drop mean_urb mean_res

ds mean*
foreach var in `r(varlist)' {
	gen `var'_s = string(`var', "%20.1fc") if order != 1 & order != 12 & order != 16
}

ds *_s
foreach var in `r(varlist)' {
	replace `var' = `var' + "\%" if order != 1 & order != 12 & order != 16 & order != 9 & order != 11 & order != 15
}

local vars = "mean_u mean_n mean_d"
foreach var in `vars' {
	gen `var'_s2 = string(`var', "%20.0fc") if order == 1 | order == 12 | order == 16
}

ds *_s
foreach var in `r(varlist)' {
	replace `var' = `var'2 if order == 1 | order == 12 | order == 16
}

drop *s2

keep variable *_s

replace variable = "Working-age rural--urban migrants (in 1000s)" in 1
replace variable = "\medskip Percent of females" in 2
replace variable = "~~~~~Percent 15--30" in 3
replace variable = "~~~~~Percent 31 or older" in 4
replace variable = "~~~~~Percent less than primary" in 5
replace variable = "~~~~~Percent primary but less than high school" in 6
replace variable = "~~~~~Percent high school or higher" in 7
replace variable = "Employment rate" in 8
replace variable = "\medskip ~~~~~Difference from the urban average (pp.)" in 9
replace variable = "Informality rate" in 10
replace variable = "\medskip ~~~~~Difference from the urban average (pp.)" in 11
replace variable = "Wages (in 2010 BRL)" in 12
replace variable = "~~~~~Relative to nonmigrant urban residents" in 13
replace variable = "Percentage of households that rent" in 14
replace variable = "\medskip ~~~~~Difference from the urban average (pp.)" in 15
replace variable = "Rent (in 2010 BRL)" in 16
replace variable = "~~~~~Relative to nonmigrant urban residents" in 17
replace variable = "~~~~~Relative to rural municipality of origin" in 18

egen latex = concat(*), p("&")
replace latex = latex + "\\"
keep latex

insobs 5, before(1)
replace latex = "\begin{tabular}{lrrr}" in 1
replace latex = "\toprule & & \multicolumn{1}{c}{Rural--urban from} & \multicolumn{1}{c}{Rural--urban from} \\" in 2
replace latex = "& \multicolumn{1}{c}{Urban--urban} & \multicolumn{1}{c}{moderate-weather} & \multicolumn{1}{c}{severe-weather} \\" in 3
replace latex = "& & \multicolumn{1}{c}{origins} & \multicolumn{1}{c}{origins} \\" in 4
replace latex = "\cmidrule{2-4}\multicolumn{4}{l}{\textbf{Panel A: demographic characteristics}} \\" in 5

insobs 2, before(13)
replace latex = "& & & \\" in 13
replace latex = "\multicolumn{4}{l}{\textbf{Panel B: labor-market performance in destination cities in 2010}} \\" in 14

insobs 2, before(21)
replace latex = "& & & \\" in 21
replace latex = "\multicolumn{4}{l}{\textbf{Panel C: housing conditions in destination cities in 2010}} \\" in 22

insobs 2, after(27)
replace latex = "\bottomrule &  &  &  \\" in 28
replace latex = "\end{tabular}" in 29

cap file close myfile
file open myfile using "$da_out/tables/appendix_table_2.tex", write replace

forv i = 1/`=_N'{
	loc t0 = latex in `i'
	file write myfile `"`t0'"' _n 
} 

file close myfile

/* Delete files */
cap rm "Census_10.dta"
cap rm "$da_tmp/drought.dta"
global years "91 00 10" 
foreach y in $years { 
    cap rm "${db_tmp}/${geo}_pcity_ind_`y'.dta" 
    cap rm "${db_tmp}/${geo}_phous_ind_`y'.dta" 
}

